library(plyr)
library(readr)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ggfortify)
library(recipes)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
## 
##     step
library(embed)
library(ggplot2)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate

Preliminary cluster analysis

UMAPs

#Datasets
crotches38i <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/crotches38i.csv")
quavers38ii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/quavers38ii.csv")
crotches38iii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/crotches38iii.csv")
crotches99i <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/crotches99i.csv")
semiquavers99ii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/semiquavers99ii.csv")
sinbar <- crotches38i
sinbar$bar <- NULL

# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(crotches38i$bar)

a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

#a$bar <- as.factor(quavers38ii$bar)
#a$bar <- as.factor(crotches38iii$bar)
#a$bar <- as.factor(crotches99i$bar)
#a$bar <- as.factor(semiquavers99ii$bar)
a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "left") +
  coord_equal()

sinbar <- quavers38ii
sinbar$bar <- NULL

# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(quavers38ii$bar)

a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "left") +
  coord_equal()

sinbar <- crotches38iii
sinbar$bar <- NULL

# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(crotches38iii$bar)

a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "left") +
  coord_equal()

sinbar <- crotches99i
sinbar$bar <- NULL

# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(crotches99i$bar)

a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "left") +
  coord_equal()

sinbar <- semiquavers99ii
sinbar$bar <- NULL

# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(semiquavers99ii$bar)

a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "left") +
  coord_equal()

library(NbClust)
multi <- crotches99i[,2:7]
nb99i<-NbClust(data = multi, diss = NULL, distance = "euclidean", min.nc = 2, max.nc = 10, 
        method = "kmeans", index = "all", alphaBeale = 0.1)
## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 9 proposed 2 as the best number of clusters 
## * 6 proposed 3 as the best number of clusters 
## * 1 proposed 5 as the best number of clusters 
## * 2 proposed 6 as the best number of clusters 
## * 3 proposed 7 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 1 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************
crotches99i$nbclust <- nb99i$Best.partition

cluster1 <- crotches99i[crotches99i$nbclust == 1,]
cluster2 <- crotches99i[crotches99i$nbclust == 2,]

table(cluster1$performer)
## 
##   CH  DPB   FB   GG  GGu   IH   MA   MG   ML   PN PR66   PV   RS   SB   SS 
##  178  161  132  175  166  157  152  157  156  175  171  164  156  178  163
table(cluster2$performer)
## 
##   CH  DPB   FB   GG  GGu   IH   MA   MG   ML   PN PR66   PV   RS   SB   SS 
##   97  114   78  100  109  118  123  118  119  100  104  111  119   97  112
prop <- numeric(15)
for (i in 1:15){
  a <- table(cluster2$performer)[i]
  b <- table(cluster1$performer)[i]
  prop[i] <- as.numeric(a)/(as.numeric(b)+as.numeric(a))
}

hist(cluster1$bar, breaks = 50)
hist(cluster2$bar, breaks = 50)

prop
##  [1] 0.3527273 0.4145455 0.3714286 0.3636364 0.3963636 0.4290909 0.4472727
##  [8] 0.4290909 0.4327273 0.3636364 0.3781818 0.4036364 0.4327273 0.3527273
## [15] 0.4072727
multi <- crotches38i[,2:9]
nb38i<-NbClust(data = multi, diss = NULL, distance = "euclidean", min.nc = 2, max.nc = 10, 
        method = "kmeans", index = "all", alphaBeale = 0.1)
## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced

## Warning in log(det(P)/det(W)): NaNs produced
## Warning in max(DiffLev[, 3], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 
## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
## 
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 2 proposed 2 as the best number of clusters 
## * 1 proposed 3 as the best number of clusters 
## * 1 proposed 8 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************
crotches38i$nbclust <- nb38i$Best.partition

cluster1 <- crotches38i[crotches38i$nbclust == 1,]
cluster2 <- crotches38i[crotches38i$nbclust == 2,]

table(cluster1$performer)
## 
##   CH  DPB   FB FvdP   GG  GGr  GGu   IH   MA   MG   ML   MP   PN PR36 PR66   PS 
##  219  136  156  150  208  182  174  190  130  182  183  192  198  145  171  164 
##   PV   RS   SB   SS 
##  215  194  162  214
table(cluster2$performer)
## 
##   CH  DPB   FB FvdP   GG  GGr  GGu   IH   MA   MG   ML   MP   PN PR36 PR66   PS 
##  151  144  124  130  162  188  196  180  150  188  187  178  172  135  109  116 
##   PV   RS   SB   SS 
##  155  176  118  156
prop <- numeric(15)
for (i in 1:15){
  a <- table(cluster2$performer)[i]
  b <- table(cluster1$performer)[i]
  prop[i] <- as.numeric(a)/(as.numeric(b)+as.numeric(a))
}

hist(cluster1$bar, breaks = 50)
hist(cluster2$bar, breaks = 50)

prop
##  [1] 0.4081081 0.5142857 0.4428571 0.4642857 0.4378378 0.5081081 0.5297297
##  [8] 0.4864865 0.5357143 0.5081081 0.5054054 0.4810811 0.4648649 0.4821429
## [15] 0.3892857
library(kohonen)
som_grid <- somgrid(xdim = 20, ydim = 20, topo = "hexagonal")
# Example: train the SOM for 100 iterations
som_model <- som(X = as.matrix(crotches99i[,2:7]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(semiquavers99ii[,2:17]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(crotches38i[,2:9]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(quavers38ii[,2:13]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(crotches38iii[,2:9]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

unor38i <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/unor38i.csv")
unor38iii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/unor38iii.csv")

united <- rbind(unor38i,unor38iii)
sinbar <- united
sinbar$bars <- NULL
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))|>
  ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

uns_rec_prep <- recipe(performer ~ ., data = unor38i[,1:17]) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

bake(uns_rec_prep, new_data = unor38i[,1:17], performer, starts_with("umap"))|>
  ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()

uns_rec_prep <- recipe(performer ~ ., data = unor38iii[,1:17]) |>
  step_umap(all_predictors(), num_comp = 2) |>
  prep()

bake(uns_rec_prep, new_data = unor38iii[,1:17], performer, starts_with("umap"))|>
  ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") +
  coord_equal()